# Required Packages
import pandas as pd
import numpy as np
# Modeling
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
# preprocessing
from sklearn.preprocessing import StandardScaler
# keras
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD, Adagrad
from keras.utils.vis_utils import plot_model
import keras.backend as K
# Visualisation libraries
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex
## progress bar
import progressbar
## seaborn
import seaborn as sns
sns.set_context('paper', rc={'font.size':12,'axes.titlesize':14,'axes.labelsize':12})
sns.set_style('white')
## matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
import matplotlib.gridspec as gridspec
import matplotlib.colors
from pylab import rcParams
plt.style.use('seaborn-whitegrid')
plt.rcParams['figure.figsize'] = 14, 8
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
%config InlineBackend.figure_format = 'retina'
## missingno
import missingno as msno
import warnings
warnings.filterwarnings("ignore")
![]()
In this article, we investigate a set simulated dataset that mimics customer behavior on the Starbucks rewards mobile app. Starbucks tends to send out offers to users of the mobile app once every few days. These offers are exclusive, that is not all users receive the same offer. An offer can contain a discount for their products or sometimes BOGO (buy one get one free). These offers have a validity period before the offer expires. The article here is inspired by a towardsdatascience.com article.
def Line(N): return N*'='
def Header(Inp, Length = 120):
print(Back.BLACK + Fore.CYAN + Style.NORMAL + '%s' % Inp + Style.RESET_ALL
+ Fore.BLUE + Style.NORMAL + ' %s' % Line(Length- len(Inp) - 1) + Style.RESET_ALL)
def Bottom(Length = 120):
print(Fore.BLUE + Style.NORMAL + '%s' % Line(Length) + Style.RESET_ALL)
# Portfolio Dataset
Header('Portfolio Dataset:')
Portfolio = pd.read_csv('StarBucks/Portfolio_Clean.csv')
display(Portfolio.head().style.hide_index())
# Profile Dataset
Header('Profile Dataset:')
Profile = pd.read_csv('StarBucks/Profile_Clean.csv')
display(Profile.head().style.hide_index())
# Transcript Dataset
Header('Transcript Dataset:')
Transcript = pd.read_csv('StarBucks/Transcript_Clean.csv')
display(Transcript.head().style.hide_index())
Bottom()
User_Data = pd.read_csv('StarBucks/User_Data.csv')
Data = pd.read_csv('StarBucks/Data.csv')
The object of the exercise is determining the best offer type for a given user. This can be done via a classification method that provides a probability as well. Here we use the sklearn MultiOutputClassifier with RandomForestClassifier for our modeling.
User_Data = User_Data.drop(['No_Offer', 'BOGO_comp', 'Info_comp', 'Disc_comp', 'Tot_Rewards_Rec', 'Offer_Difficulty'], axis=1)
Target = {'BOGO_offer':'BOGO Offers', 'Disc_offer': 'Discount Offers','Info_offer':'Informational Offers'}
Furthermore, let's look at the variance of our dataset features.
display(User_Data.drop(columns = list(Target.keys())).var().sort_values(ascending = False).to_frame(name= 'Variance')\
.style.background_gradient(cmap='OrRd').set_precision(2))
Furthermore, we would like to standardize features by removing the mean and scaling to unit variance. In this article, we demonstrated the benefits of scaling data using StandardScaler().
Temp = User_Data.columns.tolist()
Temp = list(set(Temp) - set(list(Target.keys())))
scaler = StandardScaler()
User_Data[Temp] = scaler.fit_transform(User_Data[Temp])
Temp = User_Data[Temp].var().sort_values(ascending = False).to_frame(name= 'Variance')
display(Temp.style.background_gradient(cmap=sns.light_palette("green", as_cmap=True)).set_precision(2))
def Correlation_Plot (Df,Fig_Size):
Correlation_Matrix = Df.corr().round(2)
mask = np.zeros_like(Correlation_Matrix)
mask[np.triu_indices_from(mask)] = True
for i in range(len(mask)):
mask[i,i]=0
Fig, ax = plt.subplots(figsize=(Fig_Size,Fig_Size))
sns.heatmap(Correlation_Matrix, ax=ax, mask=mask, annot=True, square=True,
cmap =sns.color_palette("Greens", n_colors=10), linewidths = 0.2, vmin=0, vmax=1, cbar_kws={"shrink": .6})
Feat_Dict = {'BOGO_Offer_Rec':'BOGO Offer Received', 'Difficulty_per_Offer':'Difficulty per Offer',
'Disc_Offer_Rec':'Discount Offer Received', 'Gender_Female':'Gender: Female',
'Gender_Male':'Gender: Male', 'Gender_Other':'Gender: Other', 'Info_Offer_Rec':'Informational Offer Received',
'Member_Tenure':'Member Tenure', 'Offer_Comp_Rec_Ratio': 'Offer Completed Receive Ratio',
'Offer_Comp_View_Ratio':'Viewed Offer Completed Ratio', 'Offer_Tran_Cnt_Ratio':'Offer Transaction Count Ratio' ,
'Offer_Trans_Amnt':'Offer Transaction Amount', 'Offer_Trans_Amnt_Ratio':'Offer Transaction Amount Ratio',
'Offer_View': 'Viewed Offer', 'Reward_per_Offer': 'Reward per Offer',
'Tot_Tran_Amnt':'Total Transaction Amount', 'Tot_Tran_Cnt':'otal Transaction Count',
'Tran_Amnt_per_Offer':'Transactions Amount per Offer','offer_comp': 'Offer Completed',
'Ave_Tran_Amnt': 'Average Transaction Amount', 'BOGO_Offer_Rec':'BOGO Offer',
'Disc_Offer':'Discount Offer', 'Info_Offer': 'Informational Offer'}
Temp = User_Data.copy()
Temp.rename(columns = Feat_Dict, inplace = True)
Correlation_Plot(Temp, 14)
X= User_Data.drop(columns = list(Target.keys()))
y = User_Data[list(Target.keys())].astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
pd.DataFrame(data={'Set':['X_train','X_test','y_train','y_test'],
'Shape':[X_train.shape, X_test.shape, y_train.shape, y_test.shape]}).set_index('Set').T
model = Sequential()
model.add(Dense(81, input_dim= X.shape[1], init='uniform', activation='sigmoid', name='Layer1'))
model.add(Dense(27, init='uniform', activation='sigmoid', name='Layer2'))
model.add(Dense(3, init='uniform', activation='sigmoid', name='Layer3'))
model.add(Dense(y.shape[1], init='uniform', activation='sigmoid', name='Layer4'))
model.summary()
plot_model(model, show_shapes=True, show_layer_names=True, expand_nested = True)
# Number of iterations
IT = int(5e2)+1
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy','mae', 'mse'])
# Train model
history = model.fit(X_train, y_train, nb_epoch= IT, batch_size=50, verbose=0)
# Predications and Score
y_pred = model.predict(X_test)
score = model.evaluate(X_test, y_test)
score = pd.DataFrame(score, index = model.metrics_names).T
history = pd.DataFrame(history.history)
display(score.style.hide_index())
fig = go.Figure()
fig.add_trace(go.Scatter(x= history.index.values, y= history['loss'].values, line=dict(color='OrangeRed', width= 1.5),
name = 'Loss'))
fig.add_trace(go.Scatter(x= history.index.values, y= history['accuracy'].values, line=dict(color='MidnightBlue', width= 1.5),
name = 'Accuracy'))
fig.add_trace(go.Scatter(x= history.index.values, y= history['mae'].values, line=dict(color='ForestGreen', width= 1.5),
name = 'Mean Absolute Error (MAE)'))
fig.add_trace(go.Scatter(x= history.index.values, y= history['mse'].values, line=dict(color='purple', width= 1.5),
name = 'Mean Squared Error (MSE)'))
fig.update_layout(legend=dict(y=0.5, traceorder='reversed', font_size=12))
fig.update_layout(dragmode='select', plot_bgcolor= 'white', height=600, hovermode='closest')
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig['layout']['xaxis'].update(range=[0, history.index.values.max()])
fig['layout']['yaxis'].update(range=[0, 1.0])
fig.show()
Next, we can plot the confusion matrix for our classifier.
Labels = list(Target.values())
Labels = [x.replace(' ','\n') for x in Labels]
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
Confusion_Matrix = confusion_matrix(y_test.idxmax(axis=1),
pd.DataFrame(np.round(y_pred),columns = y_test.columns).astype(int).idxmax(axis=1))
_ = sns.heatmap(Confusion_Matrix.round(2), annot=True, annot_kws={"size": 14}, cmap="Blues", ax = ax[0])
_ = ax[0].set_xlabel('Predicted labels')
_ = ax[0].set_ylabel('True labels');
_ = ax[0].set_title('Confusion Matrix');
_ = ax[0].xaxis.set_ticklabels(Labels)
_ = ax[0].yaxis.set_ticklabels(Labels)
Confusion_Matrix = Confusion_Matrix.astype('float') / Confusion_Matrix.sum(axis=1)[:, np.newaxis]
_ = sns.heatmap(Confusion_Matrix.round(2), annot=True, annot_kws={"size": 14}, cmap="Greens", ax = ax[1],
linewidths = 0.2, vmin=0, vmax=1, cbar_kws={"shrink": 1})
_ = ax[1].set_xlabel('Predicted labels')
_ = ax[1].set_ylabel('True labels');
_ = ax[1].set_title('Normalized Confusion Matrix');
_ = ax[1].xaxis.set_ticklabels(Labels)
_ = ax[1].yaxis.set_ticklabels(Labels)
N = 20
Test = X_test[:N]
Ind = Test.index
Temp = []
for x in ['Exact', 'Predicted']:
Temp.append(x)
Temp.append(x)
Temp.append(x)
header = [np.array(Temp, dtype = str), np.array(list(Target.values())*2)]
Pred = pd.concat([y_test[:N].reset_index(drop = True), pd.DataFrame(model.predict(Test))], axis = 1)
Pred.columns = header
del header, Temp
Pred.round(2)
Pred.index = [x[0] for x in Data.loc[Data.index.isin(Ind),['Person']].values]
display(Pred.round(2))